#!/usr/bin/env bash

# DESCRIPTION:
#
#   Provides the content encoding the specified
#   resources are served with.
#
# USAGE:
#
#   httpcompression URL ...
#
# USEFUL LINKS:
#
#   * HTTP/1.1 (RFC 2616) - Content-Encoding
#     https://tools.ietf.org/html/rfc2616#section-14.11
#
#   * SDCH Specification:
#     https://lists.w3.org/Archives/Public/ietf-http-wg/2008JulSep/att-0441/Shared_Dictionary_Compression_over_HTTP.pdf

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

declare -r -a CURL_DEFAULT_OPTIONS=(
    --connect-timeout 30
    --header "Accept-Encoding: gzip, deflate, sdch"
    --header "Cache-Control: no-cache" # Prevent intermediate proxies
                                       # from caching the response

    --location                         # If the page was moved to a
                                       # different location, redo the
                                       # request
    --max-time 150
    --show-error
    --silent
    --user-agent "Mozilla/5.0 Gecko"   # Send a fake UA string for sites
                                       # that sniff it instead of using
                                       # the Accept-Encoding header
)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

check_for_sdch() {

    declare availDicts=""
    declare currentHeaders="$1"
    declare dict=""
    declare dictClientID=""
    declare dicts=""
    declare i=""
    declare url="$2"

    # Check if the server advertised any dictionaries
    dicts="$( printf "%s" "$currentHeaders" |
              grep -i 'Get-Dictionary:' |
              cut -d':' -f2 |
              sed s/,/\ /g )"

    # If it does, check to see if it really supports SDCH
    if [ -n "$dicts" ]; then
        for i in $dicts; do

            dict=""

            # Check if the dictionary location is specified as a path,
            # and if it is, construct it's URL from the host name of the
            # referrer URL

            [[ "$i" != http* ]] \
                && dict="$( printf "%s" "$url" |
                            sed -En 's/([^/]*\/\/)?([^/]*)\/?.*/\1\2/p' )"

            dict="$dict$i"

            # Request the dictionaries from the server and
            # construct the `Avail-Dictionary` header value
            #
            # [ The user agent identifier for a dictionary is defined
            #   as the URL-safe base64 encoding (as described in RFC
            #   3548, section 4 [RFC3548]) of the first 48 bits (bits
            #   0..47) of the dictionary's SHA-256 digest ]

            dictClientID="$( curl "${CURL_DEFAULT_OPTIONS[@]}" "$dict" |
                             openssl dgst -sha256 -binary |
                             openssl base64 |
                             cut -c 1-8 |
                             sed -e 's/\+/-/' -e 's/\//_/' )"

            [ -n "$availDicts" ] && availDicts="$availDicts,$dictClientID" \
                                 || availDicts="$dictClientID"

        done

        # Redo the request (advertising the available dictionaries)
        # and replace the old resulted headers with the new ones

        printf "$( curl "${CURL_DEFAULT_OPTIONS[@]}" \
                        -H "Avail-Dictionary: $availDicts" \
                        --dump-header - \
                        --output /dev/null \
                        "$url" )"

    else
        printf "%s" "$currentHeaders"
    fi
}

get_content_encoding() {

    declare currentHeaders=""
    declare encoding=""
    declare headers=""
    declare indent=""
    declare tmp=""
    declare url="$1"

    headers="$(curl "${CURL_DEFAULT_OPTIONS[@]}" \
                    --dump-header - \
                    --output /dev/null \
                    "$url" )" \
    && ( \

        # Iterate over the headers of all redirects
        while [ -n "$headers" ]; do

            # Get headers for the "current" URL
            currentHeaders="$( printf "%s" "$headers" | sed -n '1,/^HTTP/p' )"

            # Remove the headers for the "current" URL
            headers="${headers/"$currentHeaders"/}"

            currentHeaders="$(check_for_sdch "$currentHeaders" "$url")"

            # Get the value of the `Content-Encoding` header
            encoding="$( printf "%s" "$currentHeaders" |
                         grep -i 'Content-Encoding:' |
                         cut -d' ' -f2 |
                         tr "\r" "," |
                         tr -d "\n" |
                         sed 's/,$//' )"

            # Print the output for the "current" URL
            [ -n "$encoding" ] && encoding="[$encoding]"

            if [ "$url" != "$1" ]; then
                printf "$indent$url $encoding\n"
                indent="  $indent"
            else
                printf "\n  * $1 $encoding\n"
                indent="    ↳ "
            fi

            # Get the next URL from the series of redirects
            tmp="$url"
            url="$( printf "%s" "$currentHeaders" |
                    grep -i 'Location' |
                    sed -e 's/Location://' |
                    sed 's/^ *//' |
                    tr -d '\r' )"

            # In case the `Location` header is specified as a path
            [[ "$url" != http* ]] && url="$tmp$url"

        done
    )
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

main() {

    # Check if cURL is installed
    if [ -x "$(command -v "curl")" ]; then
        while [ $# -ne 0 ]; do
            get_content_encoding "$1"
            shift
        done
        printf "\n"
    else
        printf "cURL is required, please install it!\n"
    fi

}

main $@
